package com.heima.wemedia;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.junit.jupiter.api.Test;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;

import java.io.IOException;

/**
 * @Author XHui
 * @Since 2024/2/17 16:25
 * @Version 1.0
 */
public class ReptilesDemo {

    private static final String url = "https://3g.163.com/touch/ent/?ver=c&clickfrom=index2018_header_main";
    private static final String host = "https://m.163.com";

    static {

    }

    @Test
    void test() throws IOException {


        /*Document document = Jsoup.connect(url)
                .header(HttpHeaders.USER_AGENT, "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36")
                .get();*/

        System.setProperty("webdriver.chrome.driver", "F:\\Software\\browser\\chrome\\chromedriver.exe");
        WebDriver driver = new ChromeDriver();
        driver.get(url);
        Document document = Jsoup.parse(driver.getPageSource());

        Elements articleElems = document.getElementsByTag("article");
        for (Element elem : articleElems){

            Element parent = elem.parent();

            String href = parent.attr("href");
            System.out.println("文章详情：" + host + href);

            Elements h4Tag = elem.getElementsByTag("h4");
            String h4TagBody = h4Tag.text();
            System.out.println("title = " + h4TagBody);

            Elements images = elem.getElementsByTag("img");
            for (Element image : images) {
                String src = image.attr("src");
                String dataSrc = image.attr("data-src");
                System.out.println("src = " + src);
                System.out.println("dataSrc = " + dataSrc);
            }
        }
    }
}
